
clear
set more off

/*********************************************************************************
Name: defaults.do

Data In: [Data/Original/patient_surveys_attempts.dta,
		  Data/Original/patient_surveys.dta,
		  Data/Intermediate/patient_controls.dta,
		  Data/Original/tb_registers.dta,
		  Data/Original/patient_start_dates.dta,
		  Data/Original/hw_roster.dta,
		  Data/Intermediate/verified_patients.dta,
		  Data/Intermediate/hw_covariates.dta,
		  Data/Intermediate/patients_summary_stats_defaults.dta,
		  Data/Original/centerwise_sheets.dta,
		  Data/Intermediate/hw_roster_by_month.dta,
		  Data/Intermediate/hw_roster_by_center.dta,
		  Data/Intermediate/hw_controls.dta]

Data Out: [Data/Intermediate/Table6_1_input.dta,
		   Data/Intermediate/TableB2_1_input.dta,
		   Data/Intermediate/Table6_3_input.dta,
		   Data/Intermediate/TableB2_2_input.dta,
		   Data/Intermediate/Table6_2_input.dta]

Results Out: [Results/Paper/Table3_PanelA.out,
			  Results/Appendix/TableC1_PanelA.log,
			  Results/Appendix/TableD1_PanelA.log,
			  Results/Appendix/TableE1_PanelA.out,
			  Results/Appendix/TableB1_PanelA.out,
			  Results/Appendix/TableA11_PanelA.out,
			  Results/Appendix/TableA3.out,
			  Results/Appendix/TableA4_PanelA.out,
			  Results/Appendix/TableA4_PanelB.out,
			  Results/Paper/Table6.out,
			  Results/Appendix/TableE4.out,
			  Results/Appendix/TableA9.out,
			  Results/Appendix/TableC4.log,
			  Results/Appendix/TableD4.log,
			  Results/Appendix/TableB2.out]

Purpose of do-file: Estimating the treatment's impact on default and on the quality of reporting on default

Organization: PART-1: Estimating the treatment's impact on default
			  PART-2: Identifying health worker and patient characteristics predicting default
			  PART-3: Estimating the treatment's impact on the quality of reporting on default
*********************************************************************************/

* Setting path directory
cd "${DIRECTORY}"


****************************************
*** PART-1 *** Estimating the treatment's impact on default
****************************************

** Calling and merging datasets

use "Data/Original/patient_surveys_attempts.dta", clear

merge 1:1 q1_ques_code a02_entry_exit using "Data/Original/patient_surveys.dta"
drop _merge

merge m:1 q1_ques_code using "Data/Intermediate/patient_controls.dta"
drop _merge

merge m:1 q1_ques_code using "Data/Original/tb_registers.dta"
drop if _merge == 2
drop _merge

merge m:1 q1_ques_code using "Data/Original/patient_start_dates.dta"
drop _merge

merge m:1 Unique_ID UID_Center using "Data/Original/hw_roster.dta"
keep if _merge == 3
drop _merge

merge 1:1 q1_ques_code a02_entry_exit using "Data/Intermediate/verified_patients.dta"
keep if _merge == 3
drop _merge

global patient_controls = "pat_male pat_age pat_caste_general pat_hindu pat_rw_both pat_ownhouse pat_hhd_size pat_migrate_always pat_migrate_6plus pat_time_to_center pat_dum_male pat_dum_age pat_dum_caste_general pat_dum_hindu pat_dum_rw_both pat_dum_ownhouse pat_dum_hhd_size pat_dum_migrate_always pat_dum_migrate_6plus pat_dum_time_to_center"

global patient_controls_2 = "pat_caste_sc pat_elec pat_migrate_6plus pat_dum_caste_sc pat_dum_elec pat_dum_migrate_6plus"


** Creating the outcome variables

* Last day of taking pill

* Imputing day of month = 1 if day is missing and month is non-missing
replace j1_last_pill_dd = 1 if !inlist(j1_last_pill_mm,.,-999,-888,-555,-333,-111) & inlist(j1_last_pill_dd,.,-999,-888,-555,-333,-111)

* Handling missing values and converting to string to create date variable
foreach var of varlist j1_last_pill_mm j1_last_pill_dd j1_last_pill_yy {

	gen `var'_str = `var'
	replace `var'_str = . if inlist(`var'_str,-999,-888,-555,-333,-111)
	tostring `var'_str, replace
	
}

gen last_pill_date = date(j1_last_pill_mm_str + "/" + j1_last_pill_dd_str + "/" + j1_last_pill_yy_str, "MDY")
format last_pill_date %td

* Days between last pill and start of treatment

gen days_of_treatment = last_pill_date - treatment_start_date + 1

* Cleaning variable "Longest consecutive duration of missing pills"

ta j4_period_not_pill if j4_period_not_pill_unit == 1
ta j4_period_not_pill if j4_period_not_pill_unit == 2
ta j4_period_not_pill if j4_period_not_pill_unit == 3

* Correcting outliers
replace j4_period_not_pill_unit = 1 if (j4_period_not_pill == 60 | j4_period_not_pill == 78) & j4_period_not_pill_unit == 2
replace j4_period_not_pill_unit = 1 if (j4_period_not_pill == 14 | j4_period_not_pill == 20 | j4_period_not_pill == 38 | j4_period_not_pill == 42) & j4_period_not_pill_unit == 3

gen period_not_pill_days = j4_period_not_pill

* Converting all duration units to "days"
replace period_not_pill_days = . if inlist(j4_period_not_pill, -999, .)
replace period_not_pill_days = period_not_pill_days * 7 if j4_period_not_pill_unit == 2
replace period_not_pill_days = period_not_pill_days * 30 if j4_period_not_pill_unit == 3

* Defining treatment outcomes

* Patient death
gen died = (q14_outcome_survey == 9)

* Treatment completed
gen treatment_complete = ((q14_outcome_survey == 1 | q14_outcome_survey == 2) & j5_full_medication == 1)
replace treatment_complete = 1 if (q14_outcome_survey == 1 | q14_outcome_survey == 2) & j5_full_medication == 3 & days_of_treatment >= 120 & days_of_treatment ~= .
replace treatment_complete = 0 if period_not_pill_days ~= . & period_not_pill_days >= 60

* Treatment ongoing
gen treatment_ongoing = ((q14_outcome_survey == 1 | q14_outcome_survey == 2) & j5_full_medication == 2)
replace treatment_ongoing = 0 if period_not_pill_days ~= . & period_not_pill_days >= 60

* Default
gen default = ((q14_outcome_survey == 1 | q14_outcome_survey == 2) & j5_full_medication == 3 & (days_of_treatment < 120 | days_of_treatment == .))
replace default = 1 if (j5_full_medication == 1 | j5_full_medication == 2 | j5_full_medication == 3) & period_not_pill_days >= 60 & period_not_pill_days ~= .
// This definition uses all relevant variables in the questionnaire. We used the official definition by WHO, and only marked patients who either missed pills for more than 60 days anytime during the treatment, or who stopped taking pills before 120 days of the treatment as defaults.

* Outcome unknown
gen outcome_unknown = (q14_outcome_survey == 3 | q14_outcome_survey == 4 | q14_outcome_survey == 5 | q14_outcome_survey == 6 | q14_outcome_survey == 7 | q14_outcome_survey == 8)
replace outcome_unknown = 1 if (q14_outcome_survey == 1 | q14_outcome_survey == 2) & treatment_complete == 0 & treatment_ongoing == 0 & default == 0

* Checking that variables are mutually exclusive
egen treatment_outcome = rowtotal(died outcome_unknown treatment_complete treatment_ongoing default), missing
								  
assert treatment_outcome == 1
drop treatment_outcome


** Regressions

* Defining sample: all verified patients, and using their exit survey

gen sample_temp = (a02_entry_exit == 2 | a02_entry_exit == 3 | a02_entry_exit == 4 | a02_entry_exit == 5) & verified_pat == 1

* Estimating the treatment's impact on TB treatment outcomes: Table 3, Panel A

* Sidak-Holm correction

preserve

foreach y in default treatment_ongoing treatment_complete died outcome_unknown{
	ivreg2 `y' treatment final_stratum_id1-final_stratum_id13 post_exp if sample_temp == 1, small cl(uid_cluster)   
	test treatment
	gen `y'_p1 = `r(p)'
	ivreg2 `y' treatment final_stratum_id1-final_stratum_id13 post_exp $patient_controls if sample_temp == 1, small cl(uid_cluster)   
	test treatment
	gen `y'_p2 = `r(p)'
}	

keep default_p* treatment_ongoing_p* treatment_complete_p* died_p* outcome_unknown_p*
gen id = 99
keep in 1
reshape long default_p treatment_ongoing_p treatment_complete_p died_p outcome_unknown_p, i(id) j(test) 
drop id
xpose, clear varname
drop in 1
rename v1 pval_1
rename v2 pval_2
rename _varname variable	
gen number = _N
foreach y in  pval_1 pval_2 {
	sort `y'
	gen `y'_k=(_N+1)-_n	
	generate `y'_sidak=1-(1-`y')^`y'_k
	replace  `y'_sidak=`y'_sidak[_n-1] if `y'_sidak[_n-1]>`y'_sidak in 2/L
	replace  `y'_sidak=1 if `y'_sidak>1 & `y'_sidak~=.
	drop `y'_k
}
drop number
order variable pval_1* pval_2*

foreach y in default_p treatment_ongoing_p treatment_complete_p died_p outcome_unknown_p {
	sum pval_1_sidak if variable=="`y'"
	local `y'1 = string(r(mean))
	sum pval_2_sidak if variable=="`y'"
	local `y'2 = string(r(mean))
}

restore

su default if treatment == 0	& sample_temp == 1
local mean_control=r(mean)

ivreg2 default treatment final_stratum_id1-final_stratum_id13 post_exp if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table3_PanelA.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control', "Sidak-Holm p-val", `default_p1') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)

ivreg2 default treatment final_stratum_id1-final_stratum_id13 post_exp $patient_controls if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table3_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control', "Sidak-Holm p-val", `default_p2') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

foreach var in treatment_ongoing treatment_complete died outcome_unknown{

	su `var' if treatment == 0 & sample_temp == 1
	local mean_control=r(mean)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 post_exp if sample_temp == 1, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Paper/Table3_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control', "Sidak-Holm p-val", ``var'_p1') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 post_exp $patient_controls if sample_temp == 1, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Paper/Table3_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control', "Sidak-Holm p-val", ``var'_p2') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

}

* Wild cluster bootstrap or pairs cluster bootstrap: Table C1, Panel A, and Table D1, Panel A

preserve

keep if sample_temp == 1

foreach var in _male _age _caste_general _hindu _rw_both _ownhouse _hhd_size _migrate_always _migrate_6plus _time_to_center _dum_male _dum_age _dum_caste_general _dum_hindu _dum_rw_both _dum_ownhouse _dum_hhd_size _dum_migrate_always _dum_migrate_6plus _dum_time_to_center {
rename pat`var' `var'
}

global patient_controls_b = "_male _age _caste_general _hindu _rw_both _ownhouse _hhd_size _migrate_always _migrate_6plus _time_to_center _dum_male _dum_age _dum_caste_general _dum_hindu _dum_rw_both _dum_ownhouse _dum_hhd_size _dum_migrate_always _dum_migrate_6plus _dum_time_to_center"

* wild cluster bootstrap

log using "Results/Appendix/TableC1_PanelA.log", replace
foreach var in default treatment_ongoing treatment_complete died outcome_unknown{
wildbootstrap regress `var' treatment final_stratum_id1-final_stratum_id13 post_exp, cluster(uid_cluster) reps(5000) rseed(13915183)
wildbootstrap regress `var' treatment final_stratum_id1-final_stratum_id13 post_exp $patient_controls_b, cluster(uid_cluster) reps(5000) rseed(13915183)
}
log close

* pairs cluster bootstrap

log using "Results/Appendix/TableD1_PanelA.log", replace
foreach var in default treatment_ongoing treatment_complete died outcome_unknown{
clustse regress `var' treatment final_stratum_id1-final_stratum_id13 post_exp, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)
clustse regress `var' treatment final_stratum_id1-final_stratum_id13 post_exp $patient_controls_b, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)
}
log close

restore

* With second set of controls: Table E1, Panel A

su default if treatment == 0	& sample_temp == 1
local mean_control=r(mean)

ivreg2 default treatment final_stratum_id1-final_stratum_id13 post_exp if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableE1_PanelA.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)

ivreg2 default treatment final_stratum_id1-final_stratum_id13 post_exp $patient_controls_2 if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableE1_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

foreach var in treatment_ongoing treatment_complete died outcome_unknown{

	su `var' if treatment == 0 & sample_temp == 1
	local mean_control=r(mean)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 post_exp if sample_temp == 1, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableE1_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 post_exp $patient_controls_2 if sample_temp == 1, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableE1_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

}

* Restricting the sample to patients detected before experiment start date: Table B1, Panel A

su default if treatment == 0 & sample_temp == 1 & post_exp == 0
local mean_control=r(mean)

ivreg2 default treatment final_stratum_id1-final_stratum_id13 if sample_temp == 1 & post_exp == 0, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableB1_PanelA.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)

ivreg2 default treatment final_stratum_id1-final_stratum_id13 $patient_controls if sample_temp == 1 & post_exp == 0, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableB1_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

foreach var in treatment_ongoing treatment_complete outcome_unknown{
// We have to drop "died" from the list as it takes value 0 for all this subsample of patients

	su `var' if treatment == 0 & sample_temp == 1 & post_exp == 0
	local mean_control=r(mean)

	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 if sample_temp == 1 & post_exp == 0, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableB1_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 $patient_controls if sample_temp == 1 & post_exp == 0, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableB1_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

}

* Change in treatment impact over time: Table A11, Panel A

gen time_from_exp = (treatment_start_date - expstartdate) / 100
gen treatment_time = treatment * time_from_exp

su default if treatment == 0	& sample_temp == 1
local mean_control=r(mean)

ivreg2 default treatment treatment_time time_from_exp final_stratum_id1-final_stratum_id13 post_exp if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment treatment_time using "Results/Appendix/TableA11_PanelA.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment treatment_time)

ivreg2 default treatment treatment_time time_from_exp final_stratum_id1-final_stratum_id13 post_exp $patient_controls if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment treatment_time using "Results/Appendix/TableA11_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment treatment_time)

foreach var in treatment_ongoing treatment_complete died outcome_unknown{

	su `var' if treatment == 0 & sample_temp == 1
	local mean_control=r(mean)
	
	ivreg2 `var'  treatment treatment_time time_from_exp final_stratum_id1-final_stratum_id13 post_exp if sample_temp == 1, small cl(uid_cluster)   
	quietly outreg2 treatment treatment_time using "Results/Appendix/TableA11_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment treatment_time)
	
	ivreg2 `var'  treatment treatment_time time_from_exp final_stratum_id1-final_stratum_id13 $patient_controls post_exp if sample_temp == 1, small cl(uid_cluster)   
	quietly outreg2 treatment treatment_time using "Results/Appendix/TableA11_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment treatment_time)

}


****************************************
*** PART-2 *** Identifying health worker and patient characteristics predicting default
****************************************

** Identifying health worker characteristics predicting default: Table A3

preserve

merge m:1 Unique_ID using "Data/Intermediate/hw_covariates.dta"
keep if _merge == 3
drop _merge

global bl_social "male b_age caste_general caste_obc caste_sc caste_st caste_minority religion_hindu religion_muslim religion_oth"
global bl_education = "twelveandbelow tertiary otherdip_nonform"
global bl_jobhistory "b_jobs_bef_oa b_inc_yy prev_job_TB social_sector other_income"
global bl_household "same_nbhd hhd_size live_alone elec tap tv fridge rent_thirdparty ownhouse"
global bl_techexp "comp_know net_know email_acc socnet_acc nbdays_inexp"

ivreg2 default $bl_social $bl_education $bl_jobhistory $bl_household $bl_techexp if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA3.out", replace nolabel asterisk(se) nocons nonote se dec(3)

restore


** Identifying patient characteristics predicting default and estimating impact on predicted default: Table A4, Panels A and B

preserve

// Merging with dataset containing the patient covariates after dropping the non-cleaned version of two of these variables
drop a3_age i14_asha_counselor1
merge 1:1 q1_ques_code a02_entry_exit using "Data/Intermediate/patients_summary_stats_defaults.dta", update replace
drop _merge

// The exercise needs to be done based on patient characteristics as measured during their first survey completed, to be consistent with Table 2, which also considers the first survey completed

* Replacing default values by missings for the entry survey
replace default = . if a02_entry_exit ~= 2 & a02_entry_exit ~= 3 & a02_entry_exit ~= 4 & a02_entry_exit ~= 5

* Applying the values from the exit survey
so q1_ques_code
by q1_ques_code: egen temp = max(default)
replace default = temp

* Keeping only one survey per patient and only verified patients 
keep if first_complete == 1 & verified_pat == 1

global bl_social "male a3_age caste_dont_know caste_general caste_obc caste_sc caste_st caste_minority religion_hindu religion_muslim religion_oth"
global bl_education = "rw_none rw_onlyread rw_both edu_belowprimary edu_primary edu_secondary edu_grad"
global bl_household "hhd_size live_alone elec tapwater tv fridge ownhouse migrate_always migrate_6plus migrate_lessthan5 i14_asha_counselor1 dist_center_winsor current_working"

// Categories excluded from the regression below: caste_general religion_hindu rw_none edu_belowprimary migrate_always

* Regression including all controls

ivreg2 default male a3_age caste_dont_know caste_obc caste_sc caste_st caste_minority religion_muslim religion_oth rw_onlyread rw_both edu_primary edu_secondary edu_grad hhd_size live_alone tapwater tv fridge ownhouse migrate_6plus migrate_lessthan5 current_working i14_asha_counselor1 dist_center_winsor, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableA4_PanelA.out", replace nolabel asterisk(se) nocons nonote se dec(3)

predict default_predicted_a

* Prediction for patients enrolled before the beginning of the experiment

su default_predicted_a if treatment == 0 & post_exp == 0
local mean_control=r(mean)
ksmirnov default_predicted_a if post_exp == 0, by(treatment)
local ks = r(p)
ivreg2 default_predicted_a treatment final_stratum_id1-final_stratum_id13 if post_exp == 0, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA4_PanelB.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control', "KS", `ks') dec(3)

* Prediction for patients enrolled after the beginning of the experiment

su default_predicted_a if treatment == 0 & post_exp == 1
local mean_control=r(mean)
ksmirnov default_predicted_a if post_exp == 1, by(treatment)
local ks = r(p)
ivreg2 default_predicted_a treatment final_stratum_id1-final_stratum_id13 if post_exp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA4_PanelB.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control', "KS", `ks') dec(3)

* Regression including a subset of controls

ivreg2 default male a3_age caste_dont_know caste_obc caste_sc caste_st caste_minority religion_muslim religion_oth rw_onlyread rw_both edu_primary edu_secondary edu_grad hhd_size live_alone tapwater tv fridge ownhouse migrate_6plus migrate_lessthan5 current_working, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableA4_PanelA.out", append nolabel asterisk(se) nocons nonote se dec(3)

predict default_predicted_b

* Prediction for patients enrolled before the beginning of the experiment

su default_predicted_b if treatment == 0 & post_exp == 0
local mean_control=r(mean)
ksmirnov default_predicted_b if post_exp == 0, by(treatment)
local ks = r(p)
ivreg2 default_predicted_b treatment final_stratum_id1-final_stratum_id13 if post_exp == 0, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA4_PanelB.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control', "KS", `ks') dec(3)

* Prediction for patients enrolled after the beginning of the experiment

su default_predicted_b if treatment == 0 & post_exp == 1
local mean_control=r(mean)
ksmirnov default_predicted_b if post_exp == 1, by(treatment)
local ks = r(p)
ivreg2 default_predicted_b treatment final_stratum_id1-final_stratum_id13 if post_exp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA4_PanelB.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control', "KS", `ks') dec(3)

restore


****************************************
*** PART-3 *** Estimating the treatment's impact on the quality of reporting on default
****************************************

** Generating datasets with defaults as defined based on survey

* All patients

preserve

keep if sample_temp == 1

so UID_Center
by UID_Center: gen weight_survey = _N
by UID_Center: egen total_defaults = total(default)
gen default_survey = total_defaults / weight_survey

by UID_Center: gen temp = _n
keep if temp == 1

keep UID_Center default_survey weight_survey

save "Data/Intermediate/Table6_1_input.dta", replace

restore

* Patients enrolled before the beginning of the experiment

preserve

keep if sample_temp == 1 & post_exp == 0

so UID_Center
by UID_Center: gen weight_survey = _N
by UID_Center: egen total_defaults = total(default)
gen default_survey = total_defaults / weight_survey

by UID_Center: gen temp = _n
keep if temp == 1

keep UID_Center default_survey weight_survey

save "Data/Intermediate/TableB2_1_input.dta", replace

restore


** Generating datasets with defaults as defined based on government registers

drop sample_temp
gen sample_temp = (q3_city_code ~= 24 | q2_state_code ~= 13) & verified_pat == 1 & first_attempt == 1

gen verification_available = (out_come_verify ~= .)

ta out_come_verify
gen default_ver = (out_come_verify == 3) if verification_available == 1

* All patients

preserve

keep if sample_temp == 1 & default_ver ~= .

so UID_Center
by UID_Center: gen weight_verif = _N
by UID_Center: egen total_defaults = total(default_ver)
gen default_verif = total_defaults / weight_ver

by UID_Center: gen temp = _n
keep if temp == 1

keep UID_Center default_verif weight_verif

save "Data/Intermediate/Table6_3_input.dta", replace

restore

* Patients enrolled before the beginning of the experiment

preserve

keep if sample_temp == 1 & post_exp == 0 & default_ver ~= .

so UID_Center
by UID_Center: gen weight_verif = _N
by UID_Center: egen total_defaults = total(default_ver)
gen default_verif = total_defaults / weight_ver

by UID_Center: gen temp = _n
keep if temp == 1

keep UID_Center default_verif weight_verif

save "Data/Intermediate/TableB2_2_input.dta", replace

restore


** Generating datasets with defaults as defined based on program data

use "Data/Original/centerwise_sheets.dta", clear

* Replacing Unique_ID information for the center * months for which it is missing
drop Unique_ID
merge m:1 UID_Center MonthIntoExp using "Data/Intermediate/hw_roster_by_month.dta"
ta _merge
ta MonthIntoExp if _merge == 1
drop if _merge == 2
drop _merge

* Defining alternative outcomes, at the center level, not center x area level, for merge with patient data (since we don't know which mobile area a patient went to in the patient data)

so UID_Center
bysort UID_Center: egen weight_oa = total(TotOutcome) if MonthIntoExp<101 & MonthIntoExp ~= .
bysort UID_Center: egen total_defaults = total(Default) if MonthIntoExp<101 & MonthIntoExp ~= .

gen default_oa = total_defaults / weight_oa
replace default_oa = 0 if default_oa == . & weight_oa == 0

bysort UID_Center: egen temp1 = total(TotOutcome) if MonthIntoExp>=101 & MonthIntoExp ~= .
bysort UID_Center: egen temp2 = total(Default) if MonthIntoExp>=101 & MonthIntoExp ~= .
gen temp3 = temp2 / temp1
replace temp3 = 0 if temp3 == . & temp1 == 0
by UID_Center: egen b_default_oa=max(temp3)
drop temp1 temp2 temp3

keep if MonthIntoExp<101 & MonthIntoExp ~= .

so UID_Center
by UID_Center: gen id = _n
keep if id == 1

keep UID_Center default_oa b_default_oa weight_oa

save "Data/Intermediate/Table6_2_input.dta", replace


** Comparing impact on default measured using multiple data sources

* With weights, main specification: Table 6

use "Data/Intermediate/Table6_2_input.dta", clear

merge 1:1 UID_Center using "Data/Intermediate/Table6_1_input.dta"
ta _merge
drop _merge

merge 1:1 UID_Center using "Data/Intermediate/Table6_3_input.dta"
ta _merge
drop _merge

merge 1:m UID_Center using "Data/Intermediate/hw_roster_by_month.dta"
ta _merge
drop _merge
keep if MonthIntoExp == 1
drop MonthIntoExp
// Adding information about the health worker in charge of the center at baseline, for the merge with the health worker control variables

merge 1:1 UID_Center using "Data/Intermediate/hw_roster_by_center.dta"
ta _merge
drop _merge

merge m:1 Unique_ID using "Data/Intermediate/hw_controls.dta"
ta _merge
drop if _merge == 2
drop _merge

global hw_controls = "b_age_couns b_jobs_bef_oa_couns male_couns gen_caste_couns hindu_couns twelveandbelow_couns tertiary_couns hhd_size_couns ownhouse_couns b_age_dum_couns b_jobs_bef_oa_dum_couns male_dum_couns gen_caste_dum_couns hindu_dum_couns twelveandbelow_dum_couns tertiary_dum_couns hhd_size_dum_couns ownhouse_dum_couns"

global hw_controls_2 = "b_jobs_bef_oa_couns b_age_couns gen_caste_couns caste_st_couns religion_oth_couns same_nbhd_couns gen_caste_dum_couns caste_st_dum_couns religion_oth_dum_couns b_jobs_bef_oa_dum_couns same_nbhd_dum_couns b_age_dum_couns"

gen default_diff_oa = default_survey - default_oa
gen default_diff_verif = default_survey - default_verif

su default_survey [aw=weight_survey] if treatment == 0
local mean_control=r(mean)

ivreg2 default_survey treatment final_stratum_id1-final_stratum_id13 [aw=weight_survey], small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table6.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)

ivreg2 default_survey treatment final_stratum_id1-final_stratum_id13 $hw_controls [aw=weight_survey], small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table6.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

foreach var in default_oa default_diff_oa {

	su `var' [aw=weight_survey] if treatment == 0
	local mean_control=r(mean)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 b_default_oa [aw=weight_survey], small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Paper/Table6.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 b_default_oa $hw_controls [aw=weight_survey], small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Paper/Table6.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

}

foreach var in default_verif default_diff_verif {

	su `var' [aw=weight_survey] if treatment == 0
	local mean_control=r(mean)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 [aw=weight_survey], small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Paper/Table6.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls [aw=weight_survey], small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Paper/Table6.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

} 

* With weights, and with second set of controls: Table E4

su default_survey [aw=weight_survey] if treatment == 0
local mean_control=r(mean)

ivreg2 default_survey treatment final_stratum_id1-final_stratum_id13 [aw=weight_survey], small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableE4.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)

ivreg2 default_survey treatment final_stratum_id1-final_stratum_id13 $hw_controls_2 [aw=weight_survey], small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableE4.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

foreach var in default_oa default_diff_oa {

	su `var' [aw=weight_survey] if treatment == 0
	local mean_control=r(mean)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 b_default_oa [aw=weight_survey], small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableE4.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 b_default_oa $hw_controls_2 [aw=weight_survey], small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableE4.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

}

foreach var in default_verif default_diff_verif {

	su `var' [aw=weight_survey] if treatment == 0
	local mean_control=r(mean)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 [aw=weight_survey], small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableE4.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls_2 [aw=weight_survey], small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableE4.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

} 

* Without weights: Table A9

su default_survey if treatment == 0
local mean_control=r(mean)

ivreg2 default_survey treatment final_stratum_id1-final_stratum_id13, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA9.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)

ivreg2 default_survey treatment final_stratum_id1-final_stratum_id13 $hw_controls, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA9.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

foreach var in default_oa default_diff_oa {

	su `var' if treatment == 0
	local mean_control=r(mean)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 b_default_oa, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableA9.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 b_default_oa $hw_controls, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableA9.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

}

foreach var in default_verif default_diff_verif {

	su `var' if treatment == 0
	local mean_control=r(mean)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableA9.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableA9.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

}

* Wild cluster bootstrap or pairs cluster bootstrap, without weights: Tables C4 and D4

preserve

foreach var in b_age_ b_jobs_bef_oa_ male_ gen_caste_ hindu_ twelveandbelow_ tertiary_ hhd_size_ ownhouse_ b_age_dum_ b_jobs_bef_oa_dum_ male_dum_ gen_caste_dum_ hindu_dum_ twelveandbelow_dum_ tertiary_dum_ hhd_size_dum_ ownhouse_dum_ {
rename `var'couns `var'
}

global hw_controls_b = "b_age_ b_jobs_bef_oa_ male_ gen_caste_ hindu_ twelveandbelow_ tertiary_ hhd_size_ ownhouse_ b_age_dum_ b_jobs_bef_oa_dum_ male_dum_ gen_caste_dum_ hindu_dum_ twelveandbelow_dum_ tertiary_dum_ hhd_size_dum_ ownhouse_dum_"

* wild cluster bootstrap: Table C4

log using "Results/Appendix/TableC4.log", replace

wildbootstrap regress default_survey treatment final_stratum_id1-final_stratum_id13, cluster(uid_cluster) reps(5000) rseed(13915183)
wildbootstrap regress default_survey treatment final_stratum_id1-final_stratum_id13 $hw_controls_b, cluster(uid_cluster) reps(5000) rseed(13915183)

foreach var in default_oa default_diff_oa {
wildbootstrap regress `var' treatment final_stratum_id1-final_stratum_id13 b_default_oa, cluster(uid_cluster) reps(5000) rseed(13915183)   
wildbootstrap regress `var' treatment final_stratum_id1-final_stratum_id13 b_default_oa $hw_controls_b, cluster(uid_cluster) reps(5000) rseed(13915183)   
}

foreach var in default_verif default_diff_verif {
wildbootstrap regress `var' treatment final_stratum_id1-final_stratum_id13, cluster(uid_cluster) reps(5000) rseed(13915183)   
wildbootstrap regress `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls_b, cluster(uid_cluster) reps(5000) rseed(13915183)   
} 

log close

* pairs cluster bootstrap: Table D4

log using "Results/Appendix/TableD4.log", replace

clustse regress default_survey treatment final_stratum_id1-final_stratum_id13, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)
clustse regress default_survey treatment final_stratum_id1-final_stratum_id13 $hw_controls_b, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)

foreach var in default_oa default_diff_oa {
clustse regress `var' treatment final_stratum_id1-final_stratum_id13 b_default_oa, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)   
clustse regress `var' treatment final_stratum_id1-final_stratum_id13 b_default_oa $hw_controls_b, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)   
}

foreach var in default_verif default_diff_verif {
clustse regress `var' treatment final_stratum_id1-final_stratum_id13, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)   
clustse regress `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls_b, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)   
} 

log close

restore

* With weights, patients enrolled before the beginning of the experiment: Table B2

use "Data/Intermediate/TableB2_1_input.dta", clear

merge 1:1 UID_Center using "Data/Intermediate/TableB2_2_input.dta"
ta _merge
drop _merge

merge 1:m UID_Center using "Data/Intermediate/hw_roster_by_month.dta"
ta _merge
drop _merge
keep if MonthIntoExp == 1
drop MonthIntoExp
// Adding information about the health worker in charge of the center at baseline, for the merge with the health worker control variables

merge 1:1 UID_Center using "Data/Intermediate/hw_roster_by_center.dta"
ta _merge
drop if _merge == 2
drop _merge

merge m:1 Unique_ID using "Data/Intermediate/hw_controls.dta"
ta _merge
drop if _merge == 2
drop _merge

global hw_controls = "b_age_couns b_jobs_bef_oa_couns male_couns gen_caste_couns hindu_couns twelveandbelow_couns tertiary_couns hhd_size_couns ownhouse_couns b_age_dum_couns b_jobs_bef_oa_dum_couns male_dum_couns gen_caste_dum_couns hindu_dum_couns twelveandbelow_dum_couns tertiary_dum_couns hhd_size_dum_couns ownhouse_dum_couns"

gen default_diff_verif = default_survey - default_verif

su default_survey [aw=weight_survey] if treatment == 0
local mean_control=r(mean)

ivreg2 default_survey treatment final_stratum_id1-final_stratum_id13 [aw=weight_survey], small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableB2.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)

ivreg2 default_survey treatment final_stratum_id1-final_stratum_id13 $hw_controls [aw=weight_survey], small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableB2.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

foreach var in default_verif default_diff_verif {

	su `var' [aw=weight_survey] if treatment == 0
	local mean_control=r(mean)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 [aw=weight_survey], small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableB2.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes) adec(3) dec(3) keep(treatment)
	
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls [aw=weight_survey], small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Appendix/TableB2.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

} 